Demonstration of Clustered Data

Andy Grogan-Kaylor
2022-01-12
Show code
# library(Rcmdr)

library(foreign) # import data

# library(googleVis)

library(lme4) # MLM

library(ggplot2) # beautiful graphs

library(ggthemes) # nice themes

library(plotly) # animated graphs

library(broom)

library(pander) # nice tables

library(sjPlot) # nice tables for MLM
Show code
# get the data sets

group_dataset <- 
  read.dta("demo group.dta", 
           convert.dates=TRUE, convert.factors=TRUE, missing.type=TRUE, 
           convert.underscore=TRUE, warn.missing.labels=TRUE)

individual_dataset <- 
  read.dta("demo individual.dta", 
           convert.dates=TRUE, convert.factors=TRUE, missing.type=TRUE, 
           convert.underscore=TRUE, warn.missing.labels=TRUE)

Grouped and Individual Data

“The data were generated from random numbers, and there is no relation between X and Y at all. Firstly, values of X and Y were generated for each ‘subject,’ then a further random number was added to make the individual observation.”

From Bland and Altman, BMJ, 1994, 308, 896.

So… we follow their procedure.

Grouped Data

Show code
p1 <- ggplot(group_dataset,
             aes(x = x, y = y, 
                 color = factor(groupnum),
                 label = groupnum)) +
  geom_point(size = 10, show.legend = FALSE) + 
  geom_text(color="white",
            show.legend = FALSE) +
  ggtitle("Grouped Data") +
  theme_minimal() +
  scale_color_viridis_d() +
  # scale_color_brewer(palette = "Set1") +
  xlim(0,100) +
  ylim(-25, 125) +
  theme(legend.position = "none")

ggplotly(p1)

Individual Data

Show code
p2 <- ggplot(individual_dataset, 
             aes(x = x.new, y = y.new,
                 color = factor(groupnum),
                 label = groupnum)) +
  geom_point(size = 5, show.legend = FALSE) + 
  geom_text(color="white", 
            show.legend = FALSE) +
  ggtitle("Individual Data") +
  theme_minimal() +
  scale_color_viridis_d() +
  # scale_color_brewer(palette = "Set1") +
  xlim(0,100) +
  ylim(-25, 125) +
  theme(legend.position = "none")

ggplotly(p2)

Analyses

OLS

Show code
myOLS <- lm(y.new ~x.new, data = individual_dataset)

# summary(myOLS)

sjPlot::tab_model(myOLS)
  y.new
Predictors Estimates CI p
(Intercept) -4.87 -35.66 – 25.92 0.746
x new 0.92 0.43 – 1.40 0.001
Observations 25
R2 / R2 adjusted 0.399 / 0.372
Show code
# pander(tidy(myOLS))

MLM

Show code
myMLM <- lmer(y.new ~x.new + (1 | groupnum), 
                              data = individual_dataset)

# summary(myMLM)

sjPlot::tab_model(myMLM)
  y.new
Predictors Estimates CI p
(Intercept) 27.26 -9.67 – 64.18 0.140
x new 0.38 -0.04 – 0.79 0.072
Random Effects
σ2 46.83
τ00 groupnum 858.47
ICC 0.95
N groupnum 5
Observations 25
Marginal R2 / Conditional R2 0.070 / 0.952
Show code
# pander(tidy(myMLM))